home *** CD-ROM | disk | FTP | other *** search
/ Skunkware 5 / Skunkware 5.iso / src / Tools / freeWAIS-sf-1.1 / ir / boolean_op.c < prev    next >
Encoding:
C/C++ Source or Header  |  1994-05-29  |  14.6 KB  |  507 lines

  1. /* 
  2.  * boolean_op.c -- 
  3.  * SCCS Status     : %W%    %G%
  4.  * Author          : Huynh Quoc T. Tung
  5.  * Created On      : Sun Oct 17 16:40:46 1993
  6.  * Last Modified By: Huynh Quoc T. Tung
  7.  * Last Modified On: Mon May 30 14:28:41 1994
  8.  * Update Count    : 27
  9.  * Status          : Unknown, Use with caution!
  10.  */
  11.  
  12. /*********************   PROCEDURE DESCRIPTION   ************************
  13.  * Data structure used is the pushdown stack.
  14.  * Basic operations:
  15.  * push(v)
  16.  * long v;
  17.  *
  18.  * push an item onto the stack (insert it at the beginning)
  19.  *
  20.  * pop() : pop an item (remove it from the beginning)
  21.  * 
  22.  * void Union(result, set1, set2)
  23.  * search_result_struct *result;
  24.  * search_result_struct *set1;
  25.  * search_result_struct *set2;
  26.  *
  27.  * union two sets and write it into result.
  28.  *
  29.  * void Intersection(result, set1, set2)
  30.  * search_result_struct * result;
  31.  * search_result_struct * set1;
  32.  * search_result_struct * set2;
  33.  * 
  34.  * intersection two sets and write it into result.
  35.  *
  36.  * void Difference(result, set1, set2)
  37.  * search_result_struct *result;
  38.  * search_result_struct *set1;
  39.  * search_result_struct *set2;
  40.  *
  41.  * elements of set1 which are common to elements of set2 will be ignored.
  42.  *
  43.  */
  44.  
  45. /*
  46. #include <stdio.h>
  47. #include <string.h>
  48. */
  49. #include "cdialect.h"
  50. #include "irfiles.h"
  51. #include "boolean_op.h"
  52. #ifdef NEW_WEIGHT /* tung , 5/94 */
  53. #include "irtfiles.h"
  54. #endif
  55.  
  56. struct node
  57.   long key; 
  58.   struct node *next;
  59. };
  60. struct node *head, *z, *t;
  61.  
  62. search_result_struct *end_result = NULL;
  63.  
  64. boolean IsOperator(op)
  65.      char *op;
  66. {
  67.   if(!strcmp(op,"and") ||
  68.      !strcmp(op,"or") ||
  69.      !strcmp(op,"not"))
  70.     return(true);
  71.   else return(false);
  72. }
  73.  
  74. void stackinit()
  75. {
  76.   head = (struct node *) malloc(sizeof (* head));
  77.   z = (struct node *) malloc(sizeof(* z));
  78.   head->next = z; 
  79.   head->key = 0;
  80.   z->next = z;
  81. }
  82.  
  83. void push(v)
  84.      long v;
  85. {
  86.   t = (struct node *) malloc(sizeof(* t));
  87.   t->key = v;
  88.   t->next = head->next;
  89.   head->next = t;
  90. }
  91.  
  92. long pop()
  93. {
  94.   long x;
  95.   
  96.   t = head->next;
  97.   head->next = t->next;
  98.   x = t->key;
  99.   free(t);
  100.   return x;
  101. }
  102.  
  103. int stackempty()
  104. {
  105.   return head->next == z;
  106. }
  107.  
  108. void Union(result, set1, set2)
  109.      search_result_struct *result;
  110.      search_result_struct *set1;
  111.      search_result_struct *set2;
  112. {
  113.   while((set1->number_of_hits != 0) &&
  114.         (set2->number_of_hits != 0)) {
  115.     if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  116.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  117.       result->doc_ids_array->score = set1->doc_ids_array->score;
  118.       ++result->number_of_hits;
  119.       ++result->doc_ids_array ;
  120.       --set1->number_of_hits;
  121.       ++set1->doc_ids_array; 
  122.     }
  123.     else if(set1->doc_ids_array->doc_id > set2->doc_ids_array->doc_id) {
  124.       result->doc_ids_array->doc_id = set2->doc_ids_array->doc_id;
  125.       result->doc_ids_array->score = set2->doc_ids_array->score;
  126.       ++result->number_of_hits;
  127.       ++result->doc_ids_array ;
  128.       --set2->number_of_hits;
  129.       ++set2->doc_ids_array;
  130.     }
  131.     else { /* doc_id1 == doc_id2  */
  132.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  133.       result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
  134.       ++result->number_of_hits;
  135.       ++result->doc_ids_array ;
  136.       --set1->number_of_hits; --set2->number_of_hits;
  137.       ++set1->doc_ids_array; ++set2->doc_ids_array;
  138.     }
  139.   }
  140.   if((set1->number_of_hits == 0) &&
  141.      (set2->number_of_hits != 0)) {
  142.     memcpy((char *)result->doc_ids_array, 
  143.            (char *)set2->doc_ids_array, 
  144.            set2->number_of_hits * sizeof(doc_descr_struct));
  145.     set2->doc_ids_array += set2->number_of_hits;
  146.     result->doc_ids_array += set2->number_of_hits;
  147.     result->number_of_hits += set2->number_of_hits;
  148.     set2->number_of_hits = 0;
  149.   }
  150.   else if((set1->number_of_hits != 0) &&
  151.           (set2->number_of_hits == 0)) {
  152.     memcpy((char *)result->doc_ids_array, 
  153.            (char *)set1->doc_ids_array, 
  154.            set1->number_of_hits * sizeof(doc_descr_struct));
  155.     set1->doc_ids_array += set1->number_of_hits;
  156.     result->doc_ids_array += set1->number_of_hits;
  157.     result->number_of_hits += set1->number_of_hits;
  158.     set1->number_of_hits = 0;
  159.   }
  160. }
  161.  
  162. void Or_Operator(op1, op2)
  163.      search_result_struct* op1;
  164.      search_result_struct* op2;
  165. {
  166.   long doc_ids_array_size = sizeof(doc_descr_struct);
  167.   long op1_number_of_hits, op2_number_of_hits;
  168.   
  169.   op1_number_of_hits = op1->number_of_hits;
  170.   op2_number_of_hits = op2->number_of_hits;
  171.   
  172.   if(op1->number_of_hits == 0) {
  173.     if(op1->doc_ids_array != NULL)
  174.       s_free(op1->doc_ids_array);
  175.     if(op2->number_of_hits > 0) {
  176.       memcpy((char *)end_result->doc_ids_array, 
  177.              (char *)op2->doc_ids_array, 
  178.              doc_ids_array_size * op2->number_of_hits);
  179.     }
  180.     end_result->number_of_hits = op2->number_of_hits;
  181.     push(op2->operand_id);
  182.   }
  183.   else if(op2->number_of_hits == 0) {
  184.     if(op2->doc_ids_array != NULL)
  185.       s_free(op2->doc_ids_array);
  186.     if(op1->number_of_hits > 0) {
  187.       memcpy((char *)end_result->doc_ids_array, 
  188.              (char *)op1->doc_ids_array, 
  189.              doc_ids_array_size * op1->number_of_hits);
  190.     }
  191.     end_result->number_of_hits = op1->number_of_hits;
  192.     push(op1->operand_id);
  193.   } 
  194.   else if((op1->number_of_hits != 0) &&
  195.           (op2->number_of_hits != 0)) { 
  196.     end_result->number_of_hits = 0;
  197.     
  198.     Union(end_result, op1, op2);
  199.     
  200.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  201.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  202.     end_result->doc_ids_array -= end_result->number_of_hits;
  203.     op2->number_of_hits = end_result->number_of_hits;
  204.     s_free(op1->doc_ids_array);
  205.     if(end_result->number_of_hits > 0) {
  206.       memcpy((char *)op2->doc_ids_array, 
  207.              (char *)end_result->doc_ids_array, 
  208.              end_result->number_of_hits * doc_ids_array_size);
  209.     }
  210.     else {
  211.       op2->number_of_hits = end_result->number_of_hits;
  212.       s_free(op2->doc_ids_array);
  213.     }
  214.     push(op2->operand_id);
  215.   }
  216. }
  217.  
  218. void Intersection(result, set1, set2)
  219.      search_result_struct * result;
  220.      search_result_struct * set1;
  221.      search_result_struct * set2;
  222. {
  223.   while((set1->number_of_hits != 0) &&
  224.         (set2->number_of_hits != 0)) {
  225.     if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
  226.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  227. #ifdef NEW_WEIGHT /* tung , 5/94 */
  228.       result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, set2->doc_ids_array->score);
  229. #else
  230.       result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
  231. #endif
  232.       ++result->number_of_hits;
  233.       ++result->doc_ids_array ;
  234.       --set1->number_of_hits; --set2->number_of_hits;
  235.       ++set1->doc_ids_array ; ++set2->doc_ids_array ;
  236.     }
  237.     else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  238.       --set1->number_of_hits;
  239.       ++set1->doc_ids_array ;
  240.     }
  241.     else /* doc_id1 > doc_id2 */
  242.       {
  243.         --set2->number_of_hits;
  244.         ++set2->doc_ids_array ;
  245.       }
  246.   }
  247. }
  248.  
  249. void And_Operator(op1, op2)
  250.      search_result_struct* op1;
  251.      search_result_struct* op2;
  252. {
  253.   long doc_ids_array_size = sizeof(doc_descr_struct);
  254.   long op1_number_of_hits, op2_number_of_hits;
  255.   
  256.   op1_number_of_hits = op1->number_of_hits;
  257.   op2_number_of_hits = op2->number_of_hits;
  258.   
  259.   if(op1->number_of_hits == 0) {
  260.     end_result->number_of_hits = 0;
  261.     if(op1->doc_ids_array != NULL)
  262.       s_free(op1->doc_ids_array);
  263.     if(op2->doc_ids_array != NULL)
  264.       s_free(op2->doc_ids_array);
  265.     push(op1->operand_id);
  266.   }
  267.   else if(op2->number_of_hits == 0) {
  268.     end_result->number_of_hits = 0;
  269.     if(op1->doc_ids_array != NULL)
  270.       s_free(op1->doc_ids_array);
  271.     if(op2->doc_ids_array != NULL)
  272.       s_free(op2->doc_ids_array);
  273.     push(op2->operand_id);
  274.   }
  275.   else if((op1->number_of_hits != 0) &&
  276.           (op2->number_of_hits != 0)) {
  277.     end_result->number_of_hits = 0;
  278.  
  279.     Intersection(end_result, op1, op2);
  280.  
  281.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  282.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  283.     end_result->doc_ids_array -= end_result->number_of_hits;
  284.     op2->number_of_hits = end_result->number_of_hits;
  285.     s_free(op1->doc_ids_array);
  286.  
  287.     if(end_result->number_of_hits > 0) {
  288.       memcpy((char *)op2->doc_ids_array, 
  289.              (char *)end_result->doc_ids_array, 
  290.              end_result->number_of_hits * doc_ids_array_size);
  291.     }
  292.     else {
  293.       op2->number_of_hits = end_result->number_of_hits;
  294.       s_free(op2->doc_ids_array);
  295.     }
  296.     push(op2->operand_id);
  297.   }
  298. }
  299.  
  300. void Difference(result, set1, set2)
  301.      search_result_struct *result;
  302.      search_result_struct *set1;
  303.      search_result_struct *set2;
  304. {
  305.   while((set1->number_of_hits != 0) &&
  306.         (set2->number_of_hits != 0)) {
  307.     if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
  308. #ifdef NEW_WEIGHT /* tung , 5/94 */
  309.       result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, 1.0 - set2->doc_ids_array->score);
  310.       if(result->doc_ids_array->score < 0)
  311.     result->doc_ids_array->score = 0;
  312. #endif
  313.       --set1->number_of_hits; --set2->number_of_hits;
  314.       ++set1->doc_ids_array; ++set2->doc_ids_array;
  315.     }
  316.     else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
  317.       result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
  318.       result->doc_ids_array->score = set1->doc_ids_array->score;
  319.       ++result->number_of_hits;
  320.       ++result->doc_ids_array;
  321.       --set1->number_of_hits;
  322.       ++set1->doc_ids_array;
  323.     }
  324.     else /* doc_id1 > doc_id2 */ {
  325.       --set2->number_of_hits;
  326.       ++set2->doc_ids_array;
  327.     }
  328.   }
  329.   if((set1->number_of_hits != 0) &&
  330.      (set2->number_of_hits == 0)) {
  331.     memcpy((char *)result->doc_ids_array, 
  332.            (char *)set1->doc_ids_array, 
  333.            set1->number_of_hits * sizeof(doc_descr_struct));
  334.     set1->doc_ids_array += set1->number_of_hits;
  335.     result->doc_ids_array += set1->number_of_hits;
  336.     result->number_of_hits += set1->number_of_hits;
  337.     set1->number_of_hits = 0;
  338.   }
  339. }
  340.  
  341. void Not_Operator( op1, op2)
  342.      search_result_struct* op1;
  343.      search_result_struct* op2;
  344. {
  345.   long doc_ids_array_size = sizeof(doc_descr_struct);
  346.   long op1_number_of_hits, op2_number_of_hits;
  347.   
  348.   op1_number_of_hits = op1->number_of_hits;
  349.   op2_number_of_hits = op2->number_of_hits;
  350.   
  351.   if(op1->number_of_hits == 0) {
  352.     end_result->number_of_hits = 0;
  353.     if(op1->doc_ids_array != NULL)
  354.       s_free(op1->doc_ids_array);
  355.     if(op2->doc_ids_array != NULL)
  356.       s_free(op2->doc_ids_array);
  357.     push(op1->operand_id);
  358.   }
  359.   else if(op2->number_of_hits == 0) {
  360.     if(op2->doc_ids_array != NULL)
  361.       s_free(op2->doc_ids_array);
  362.     if(op1->number_of_hits > 0) {
  363.       memcpy((char *)end_result->doc_ids_array, 
  364.              (char *)op1->doc_ids_array, 
  365.              doc_ids_array_size * op1->number_of_hits + 1);
  366.     }
  367.     end_result->number_of_hits = op1->number_of_hits;
  368.     push(op1->operand_id);
  369.   }
  370.   else if((op1->number_of_hits != 0) &&
  371.           (op2->number_of_hits != 0)) {
  372.     end_result->number_of_hits = 0;
  373.     
  374.     Difference(end_result, op1, op2);
  375.     
  376.     op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
  377.     op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
  378.     end_result->doc_ids_array -= end_result->number_of_hits;
  379.     op1->number_of_hits = end_result->number_of_hits;
  380.     s_free(op2->doc_ids_array);
  381.     if(end_result->number_of_hits > 0) {
  382.       memcpy((char *)op1->doc_ids_array, 
  383.              (char *)end_result->doc_ids_array, 
  384.              end_result->number_of_hits * doc_ids_array_size);
  385.     }
  386.     else {
  387.       op1->number_of_hits = end_result->number_of_hits;
  388.       s_free(op2->doc_ids_array);
  389.     }
  390.     push(op1->operand_id);
  391.   }
  392. }
  393.  
  394. boolean stackinitialized = false;
  395.  
  396. boolean boolean_operations(operator, result_array)
  397.      char *operator;
  398.      search_result_struct* result_array;
  399. {
  400.   long operand_id1, operand_id2;
  401.   int i;
  402.   
  403.   operand_id1 = operand_id2 = -1L;
  404.   
  405.   if (!stackinitialized) {
  406.     stackinit();
  407.     stackinitialized = true;
  408.   }
  409.   if (!strcmp(operator, "or")) {
  410.     if(stackempty())
  411.       operand_id1 = -1L;
  412.     else operand_id1 = pop();
  413.     if(stackempty())
  414.       operand_id2 = -1L;
  415.     else operand_id2 = pop();
  416.     if((operand_id1 == -1) || (operand_id2 == -1)) {
  417.       waislog(WLOG_HIGH, WLOG_ERROR,
  418.               "boolean search failed, too few operands.\n");
  419.       return(false);
  420.     }
  421.     else Or_Operator(&result_array[operand_id1], &result_array[operand_id2]);
  422.   }
  423.   else if (!strcmp(operator, "and")) {
  424.     if(stackempty())
  425.       operand_id1 = -1L;
  426.     else operand_id1 = pop();
  427.     if(stackempty())
  428.       operand_id2 = -1L;
  429.     else operand_id2 = pop();
  430.     if((operand_id1 == -1) || (operand_id2 == -1)) {
  431.       waislog(WLOG_HIGH, WLOG_ERROR,
  432.               "boolean search failed, too few operands.\n");
  433.       return(false);
  434.     }
  435.     else And_Operator(&result_array[operand_id1], &result_array[operand_id2]);
  436.   }
  437.   else if (!strcmp(operator, "not")) {
  438.     if(stackempty())
  439.       operand_id1 = -1L;
  440.     else operand_id1 = pop();
  441.     if(stackempty())
  442.       operand_id2 = -1L;
  443.     else operand_id2 = pop();
  444.     if((operand_id1 == -1) || (operand_id2 == -1)) {
  445.       waislog(WLOG_HIGH, WLOG_ERROR,
  446.               "boolean search failed, too few operands.\n");
  447.       return(false);
  448.     }
  449.     else Not_Operator(&result_array[operand_id2], &result_array[operand_id1]);
  450.   }
  451. }
  452.  
  453. boolean save_operand_id(operand_id, search_result_array, entries)
  454.      long operand_id;
  455.      search_result_struct* search_result_array;
  456.      long entries;
  457. {
  458.   if (!stackinitialized) {
  459.     stackinit();
  460.     stackinitialized = true;
  461.   }
  462.   if(end_result == NULL) {
  463.     end_result = 
  464.       (search_result_struct *)s_malloc(sizeof(search_result_struct));
  465.     end_result->doc_ids_array = 
  466.       (doc_descr_struct *)s_malloc(sizeof(doc_descr_struct) * entries);
  467.     if(end_result->doc_ids_array == NULL) {
  468.       waislog(WLOG_HIGH, WLOG_ERROR, "Out of memory");
  469.       return(false);
  470.     }
  471.   }
  472.   end_result->number_of_hits = search_result_array[operand_id].number_of_hits;
  473.   if(search_result_array[operand_id].doc_ids_array != NULL) 
  474.     memcpy((char*)end_result->doc_ids_array,
  475.            (char*)search_result_array[operand_id].doc_ids_array,
  476.            search_result_array[operand_id].number_of_hits * 
  477.            sizeof(doc_descr_struct));
  478.   push(operand_id);
  479.   return(true);
  480. }
  481.  
  482. long retriev_result(entries, result)
  483.      long entries;
  484.      double* result;
  485. {
  486.   int doc_id, count;
  487.   long number_of_hits = 0;
  488.   
  489.   if((end_result != NULL) && (result != NULL))
  490.     for(count=0; count < end_result->number_of_hits; count++) {
  491.       doc_id = end_result->doc_ids_array[count].doc_id;
  492.       result[doc_id] = end_result->doc_ids_array[count].score;
  493.       ++number_of_hits;
  494.     }
  495.  
  496.   if(end_result != NULL) {
  497.     if(end_result->doc_ids_array != NULL)
  498.       s_free(end_result->doc_ids_array);
  499.     s_free(end_result);
  500.   }
  501.  
  502.   stackinitialized = false;
  503.   
  504.   return(number_of_hits);
  505.